 //-----------------------------------------------------------------------------
//
//Copyright(c) 2020, ThorsianWay Technologies Co, Ltd
//All rights reserved.
//
//IP Name       :   pixel_shader
//File Name     :   uv_translate.v
//Module name   :   uv_translate
//Full name     :   matrix projection for texture coordinate 
//
//Author        :   zha daolu
//Email         :   
//Data          :   2020/5/13
//Version       :   V1.00
//
//Abstract      :   
//                  
//Called  by    :   GPU
//
//Modification history
//-----------------------------------------------------
//1.00: intial version 
//
//-----------------------------------------------------------------------------

//-----------------------------
//DEFINE MACRO
//----------------------------- 

module uv_translate
(
    input                                   SCAN_mode,
    input                                   clk,                            //input clock                   
    input                                   rst_n,                          //input reset, low active       
    input                                   bte_end,                        //input tile end                
    input[31:0]                             x,                              //input s coordinate, 1.15.16   
    input[31:0]                             y,                              //input t coordinate, 1.15.16   
    input                                   xy_en,                          //input st valid                
    input                                   busy_in,                        //input busy                    
    input signed[31:0]                      matrix_A,                       //input matrix A, 1.15.16       
    input signed[31:0]                      matrix_B,                       //input matrix B, 1.15.16       
    input signed[31:0]                      matrix_C,                       //input matrix C, 1.15.16       
    input signed[31:0]                      matrix_D,                       //input matrix D, 1.15.16       
    input signed[31:0]                      matrix_E,                       //input matrix E, 1.15.16       
    input signed[31:0]                      matrix_F,                       //input matrix F, 1.15.16       
    input signed[31:0]                      matrix_G,                       //input matrix G, 1.15.16       
    input signed[31:0]                      matrix_H,                       //input matrix H, 1.15.16       
    output reg signed[31:0]                 u,                              //output s_trans, 1.3.28        
    output reg signed[31:0]                 v,                              //output t_trans, 1.3.28        
    output                                  uv_en,                          //output valid                  
    //output reg                              uv_end,
    output                                  busy_out                        //output busy
);
parameter UV_PRECISION = 28;
//*******************
//stage 1 matrix mult
//*******************


// u_temp = ax + dy + g * 1
// v_temp = bx + ey +h * 1
// w_temp = cx + fy + 1
wire signed[63:0]                       ax;                                     //A*X  1.31.32
wire signed[63:0]                       dy;                                     //D*Y  1.31.32
wire signed[63:0]                       bx;                                     //B*X  1.31.32
wire signed[63:0]                       ey;                                     //E*Y  1.31.32
wire signed[63:0]                       cx;                                     //C*X  1.31.32
wire signed[63:0]                       fy;                                     //F*Y  1.31.32
wire                                    matrix_mul_valid;                       //mult output valid
wire signed[64:0]                       u_temp;                                 //ax + dy + g
wire signed[64:0]                       v_temp;                                 //bx + ey + h
wire signed[64:0]                       w_temp;                                 //cx + fy + 1

mult_signed_32x32_stall u_ax
(
    .clk(clk),                //input clock
    .rst_n(rst_n),            //input reset, low active
    .en(xy_en),               //input mul enable
    .busy(busy_in),           //input busy
    .dina(x),                 //input a
    .dinb(matrix_A),          //input b
    .dout(ax),                //output a*b
    .valid(matrix_mul_valid)  //output valid
);

mult_signed_32x32_stall u_dy
(
    .clk(clk),                //input clock                
    .rst_n(rst_n),            //input reset, low active    
    .en(xy_en),               //input mul enable           
    .busy(busy_in),           //input busy                 
    .dina(y),                 //input a                    
    .dinb(matrix_D),          //input b                    
    .dout(dy),                //output a*b                 
    .valid()                  //output valid               
);

mult_signed_32x32_stall u_bx
(
    .clk(clk),                //input clock                
    .rst_n(rst_n),            //input reset, low active    
    .en(xy_en),               //input mul enable           
    .busy(busy_in),           //input busy                 
    .dina(x),                 //input a                    
    .dinb(matrix_B),          //input b                    
    .dout(bx),                //output a*b                 
    .valid()                  //output valid               
);

mult_signed_32x32_stall u_ey
(
    .clk(clk),                //input clock                
    .rst_n(rst_n),            //input reset, low active    
    .en(xy_en),               //input mul enable           
    .busy(busy_in),           //input busy                 
    .dina(y),                 //input a                    
    .dinb(matrix_E),          //input b                    
    .dout(ey),                //output a*b                 
    .valid()                  //output valid               
);


mult_signed_32x32_stall u_cx
(
    .clk(clk),                //input clock                
    .rst_n(rst_n),            //input reset, low active    
    .en(xy_en),               //input mul enable           
    .busy(busy_in),           //input busy                 
    .dina(x),                 //input a                    
    .dinb(matrix_C),          //input b                    
    .dout(cx),                //output a*b                 
    .valid()                  //output valid               
);

mult_signed_32x32_stall u_fy
(
    .clk(clk),                //input clock                
    .rst_n(rst_n),            //input reset, low active    
    .en(xy_en),               //input mul enable           
    .busy(busy_in),           //input busy                 
    .dina(y),                 //input a                    
    .dinb(matrix_F),          //input b                    
    .dout(fy),                //output a*b                 
    .valid()                  //output valid               
);

assign  u_temp = $signed({ax[63],ax}) + $signed({dy[63],dy}) + $signed({{16{matrix_G[31]}},matrix_G,16'b0});
assign  v_temp = $signed({bx[63],bx}) + $signed({ey[63],ey}) + $signed({{16{matrix_H[31]}},matrix_H,16'b0});
assign  w_temp = $signed({cx[63],cx}) + $signed({fy[63],fy}) + 65'h100000000;




//*******************************************************
//stage 2 convert 64bit matrix mul result to 32 bit float
//*******************************************************

wire [6:0] w_lz; //leading zero of w_temp
wire [6:0] s_lz; //leading zero of s_temp 
wire [6:0] t_lz; //leading zero of t_temp 
                 

wire [63:0] w_unsigned = w_temp[64] ? (-w_temp) : w_temp;  //convert w_temp to uisigned
wire [63:0] s_unsigned = u_temp[64] ? (-u_temp) : u_temp;  //convert s_temp to uisigned
wire [63:0] t_unsigned = v_temp[64] ? (-v_temp) : v_temp;  //convert t_temp to uisigned


reg [31:0] s_float;  //32bit float form of s_temp;
reg [31:0] t_float;  //32bit float form of s_temp;
reg [31:0] w_float;  //32bit float form of s_temp;

wire [7:0] s_exp = 127 + 31 - s_lz + UV_PRECISION;  //exp of s_temp, s_unsigned(32.32) 31 - s_lz = s_unsigned exp compment, 127 + 31 - s_lz convert to IEEE 754 float point,127 + 31 - s_lz + UV_PRECISION left shift UV_PRECISION for fix precision of the div result  
wire [7:0] t_exp = 127 + 31 - t_lz + UV_PRECISION;  //exp of t_temp
wire [7:0] w_exp = 127 + 31 - w_lz;                 //exp of w_temp
wire [22:0] s_man =( s_unsigned << (s_lz+1)) >> 41; //man of s_unsigned, s_unsigned(32.32), first non_zero bit 63 - s_lz, left shift s_lz + 1 moved the bit after fisrt non-zero bit to MSB, then rigth shift 41 to fit the 23 bit size for IEEE 754 float point 
wire [22:0] t_man =( t_unsigned << (t_lz+1)) >> 41; //man of t_unsgnied 
wire [22:0] w_man =( w_unsigned << (w_lz+1)) >> 41; //man of w_unsigned

reg s_div_w_type; // s/w signed bit
reg t_div_w_type; // t/w signed bit
reg div_en;       //div enable

always@(posedge clk or negedge rst_n)
begin
   if(~rst_n)
   begin
        s_float <= 32'b0;
        t_float <= 32'b0;
        w_float <= 32'b0; 
        s_div_w_type <= 1'b0;
        t_div_w_type <= 1'b0;

   end
   else if(~busy_in && matrix_mul_valid)
   begin
        s_float <= {u_temp[64],s_exp,s_man};
        t_float <= {v_temp[64],t_exp,t_man};
        w_float <= {w_temp[64],w_exp,w_man};
        s_div_w_type <= u_temp[64] ^ w_temp[64];
        t_div_w_type <= v_temp[64] ^ w_temp[64];

   end

end

always@(posedge clk or negedge rst_n)
begin
   if(~rst_n)
   begin
        div_en <= 1'b0;

   end
   else if(~busy_in)
   begin
        div_en <= matrix_mul_valid;

   end

end 

// count leding zeros
clz_64 u_clz_64_w(
    .Data_in(w_unsigned),
    .LZB_out(w_lz)
);

clz_64 u_clz_64_s(
    .Data_in(s_unsigned),
    .LZB_out(s_lz)
); 

clz_64 u_clz_64_t(
    .Data_in(t_unsigned),
    .LZB_out(t_lz)
);  


wire div_valid;

//******************************
// stage 3 float div for u/w,v/w
// *****************************

gc_ifdiv u_sdivw(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(busy_in),                //input busy
    .en(div_en),                   //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(s_float),                   //input a
    .b(w_float),                   //input b
    .type_q({1'b0,s_div_w_type}),  //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(div_valid),             //output valid
    .quotient(u)                   //output data
);

gc_ifdiv u_tdivw(
    .clk(clk),                     //input clock                                          
    .rst_n(rst_n),                 //input reset, low active                              
    .busy(busy_in),                //input busy                                           
    .en(div_en),                   //input enable                                         
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(t_float),                   //input a                                              
    .b(w_float),                   //input b                                              
    .type_q({1'b0,t_div_w_type}),  //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),                      //output valid                                         
    .quotient(v)                   //output data                                          
); 

assign uv_en = div_valid && ~busy_in; //output valid

endmodule 












